In [1]:
!pip install plotly
!pip install datetime
Requirement already satisfied: plotly in ./miniconda3/lib/python3.11/site-packages (5.18.0) Requirement already satisfied: tenacity>=6.2.0 in ./miniconda3/lib/python3.11/site-packages (from plotly) (8.2.3) Requirement already satisfied: packaging in ./miniconda3/lib/python3.11/site-packages (from plotly) (23.1) Requirement already satisfied: datetime in ./miniconda3/lib/python3.11/site-packages (5.4) Requirement already satisfied: zope.interface in ./miniconda3/lib/python3.11/site-packages (from datetime) (6.1) Requirement already satisfied: pytz in ./miniconda3/lib/python3.11/site-packages (from datetime) (2023.3.post1) Requirement already satisfied: setuptools in ./miniconda3/lib/python3.11/site-packages (from zope.interface->datetime) (68.0.0)
In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots
from datetime import datetime
In [3]:
covid_df = pd.read_csv("/Users/sauravthakur/Desktop/Data Analysis data sets/covid_19_india.csv")
covid_df.head(10)
Out[3]:
| Sno | Date | Time | State/UnionTerritory | ConfirmedIndianNational | ConfirmedForeignNational | Cured | Deaths | Confirmed | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 1.0 | 2020-01-30 | 6:00 PM | Kerala | 1 | 0 | 0.0 | 0.0 | 1.0 |
| 1 | 2.0 | 2020-01-31 | 6:00 PM | Kerala | 1 | 0 | 0.0 | 0.0 | 1.0 |
| 2 | 3.0 | 2020-02-01 | 6:00 PM | Kerala | 2 | 0 | 0.0 | 0.0 | 2.0 |
| 3 | 4.0 | 2020-02-02 | 6:00 PM | Kerala | 3 | 0 | 0.0 | 0.0 | 3.0 |
| 4 | 5.0 | 2020-02-03 | 6:00 PM | Kerala | 3 | 0 | 0.0 | 0.0 | 3.0 |
| 5 | 6.0 | 2020-02-04 | 6:00 PM | Kerala | 3 | 0 | 0.0 | 0.0 | 3.0 |
| 6 | 7.0 | 2020-02-05 | 6:00 PM | Kerala | 3 | 0 | 0.0 | 0.0 | 3.0 |
| 7 | 8.0 | 2020-02-06 | 6:00 PM | Kerala | 3 | 0 | 0.0 | 0.0 | 3.0 |
| 8 | 9.0 | 2020-02-07 | 6:00 PM | Kerala | 3 | 0 | 0.0 | 0.0 | 3.0 |
| 9 | 10.0 | 2020-02-08 | 6:00 PM | Kerala | 3 | 0 | 0.0 | 0.0 | 3.0 |
In [4]:
covid_df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 15114 entries, 0 to 15113 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Sno 15086 non-null float64 1 Date 15086 non-null object 2 Time 15086 non-null object 3 State/UnionTerritory 15086 non-null object 4 ConfirmedIndianNational 15086 non-null object 5 ConfirmedForeignNational 15086 non-null object 6 Cured 15086 non-null float64 7 Deaths 15086 non-null float64 8 Confirmed 15086 non-null float64 dtypes: float64(4), object(5) memory usage: 1.0+ MB
In [5]:
covid_df.describe()
Out[5]:
| Sno | Cured | Deaths | Confirmed | |
|---|---|---|---|---|
| count | 15086.000000 | 1.508600e+04 | 15086.000000 | 1.508600e+04 |
| mean | 7543.500000 | 1.747937e+05 | 2721.084449 | 1.942820e+05 |
| std | 4355.097416 | 3.648330e+05 | 7182.672358 | 4.095184e+05 |
| min | 1.000000 | 0.000000e+00 | 0.000000 | 0.000000e+00 |
| 25% | 3772.250000 | 1.685000e+03 | 12.000000 | 2.935500e+03 |
| 50% | 7543.500000 | 1.964700e+04 | 364.000000 | 2.608150e+04 |
| 75% | 11314.750000 | 2.087552e+05 | 2170.000000 | 2.216012e+05 |
| max | 15086.000000 | 4.927480e+06 | 83777.000000 | 5.433506e+06 |
In [6]:
vaccine_df = pd.read_csv("/Users/sauravthakur/Desktop/Data Analysis data sets/covid_vaccine_statewise.csv")
vaccine_df.head(10)
Out[6]:
| Updated On | State | Total Doses Administered | Sessions | Sites | First Dose Administered | Second Dose Administered | Male (Doses Administered) | Female (Doses Administered) | Transgender (Doses Administered) | ... | 18-44 Years (Doses Administered) | 45-60 Years (Doses Administered) | 60+ Years (Doses Administered) | 18-44 Years(Individuals Vaccinated) | 45-60 Years(Individuals Vaccinated) | 60+ Years(Individuals Vaccinated) | Male(Individuals Vaccinated) | Female(Individuals Vaccinated) | Transgender(Individuals Vaccinated) | Total Individuals Vaccinated | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 16/01/2021 | India | 48276.0 | 3455.0 | 2957.0 | 48276.0 | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 23757.0 | 24517.0 | 2.0 | 48276.0 |
| 1 | 17/01/2021 | India | 58604.0 | 8532.0 | 4954.0 | 58604.0 | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 27348.0 | 31252.0 | 4.0 | 58604.0 |
| 2 | 18/01/2021 | India | 99449.0 | 13611.0 | 6583.0 | 99449.0 | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 41361.0 | 58083.0 | 5.0 | 99449.0 |
| 3 | 19/01/2021 | India | 195525.0 | 17855.0 | 7951.0 | 195525.0 | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 81901.0 | 113613.0 | 11.0 | 195525.0 |
| 4 | 20/01/2021 | India | 251280.0 | 25472.0 | 10504.0 | 251280.0 | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 98111.0 | 153145.0 | 24.0 | 251280.0 |
| 5 | 21/01/2021 | India | 365965.0 | 32226.0 | 12600.0 | 365965.0 | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 132784.0 | 233143.0 | 38.0 | 365965.0 |
| 6 | 22/01/2021 | India | 549381.0 | 36988.0 | 14115.0 | 549381.0 | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 193899.0 | 355402.0 | 80.0 | 549381.0 |
| 7 | 23/01/2021 | India | 759008.0 | 43076.0 | 15605.0 | 759008.0 | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 267856.0 | 491049.0 | 103.0 | 759008.0 |
| 8 | 24/01/2021 | India | 835058.0 | 49851.0 | 18111.0 | 835058.0 | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 296283.0 | 538647.0 | 128.0 | 835058.0 |
| 9 | 25/01/2021 | India | 1277104.0 | 55151.0 | 19682.0 | 1277104.0 | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 444137.0 | 832766.0 | 201.0 | 1277104.0 |
10 rows × 24 columns
In [7]:
vaccine_df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 7845 entries, 0 to 7844 Data columns (total 24 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Updated On 7845 non-null object 1 State 7845 non-null object 2 Total Doses Administered 7621 non-null float64 3 Sessions 7621 non-null float64 4 Sites 7621 non-null float64 5 First Dose Administered 7621 non-null float64 6 Second Dose Administered 7621 non-null float64 7 Male (Doses Administered) 7461 non-null float64 8 Female (Doses Administered) 7461 non-null float64 9 Transgender (Doses Administered) 7461 non-null float64 10 Covaxin (Doses Administered) 7621 non-null float64 11 CoviShield (Doses Administered) 7621 non-null float64 12 Sputnik V (Doses Administered) 2995 non-null float64 13 AEFI 5438 non-null float64 14 18-44 Years (Doses Administered) 1702 non-null float64 15 45-60 Years (Doses Administered) 1702 non-null float64 16 60+ Years (Doses Administered) 1702 non-null float64 17 18-44 Years(Individuals Vaccinated) 3733 non-null float64 18 45-60 Years(Individuals Vaccinated) 3734 non-null float64 19 60+ Years(Individuals Vaccinated) 3734 non-null float64 20 Male(Individuals Vaccinated) 160 non-null float64 21 Female(Individuals Vaccinated) 160 non-null float64 22 Transgender(Individuals Vaccinated) 160 non-null float64 23 Total Individuals Vaccinated 5919 non-null float64 dtypes: float64(22), object(2) memory usage: 1.4+ MB
In [8]:
vaccine_df.describe()
Out[8]:
| Total Doses Administered | Sessions | Sites | First Dose Administered | Second Dose Administered | Male (Doses Administered) | Female (Doses Administered) | Transgender (Doses Administered) | Covaxin (Doses Administered) | CoviShield (Doses Administered) | ... | 18-44 Years (Doses Administered) | 45-60 Years (Doses Administered) | 60+ Years (Doses Administered) | 18-44 Years(Individuals Vaccinated) | 45-60 Years(Individuals Vaccinated) | 60+ Years(Individuals Vaccinated) | Male(Individuals Vaccinated) | Female(Individuals Vaccinated) | Transgender(Individuals Vaccinated) | Total Individuals Vaccinated | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 7.621000e+03 | 7.621000e+03 | 7621.000000 | 7.621000e+03 | 7.621000e+03 | 7.461000e+03 | 7.461000e+03 | 7461.000000 | 7.621000e+03 | 7.621000e+03 | ... | 1.702000e+03 | 1.702000e+03 | 1.702000e+03 | 3.733000e+03 | 3.734000e+03 | 3.734000e+03 | 1.600000e+02 | 1.600000e+02 | 160.000000 | 5.919000e+03 |
| mean | 9.188171e+06 | 4.792358e+05 | 2282.872064 | 7.414415e+06 | 1.773755e+06 | 3.620156e+06 | 3.168416e+06 | 1162.978019 | 1.044669e+06 | 8.126553e+06 | ... | 8.773958e+06 | 7.442161e+06 | 5.641605e+06 | 1.395895e+06 | 2.916515e+06 | 2.627444e+06 | 4.461687e+07 | 3.951018e+07 | 12370.543750 | 4.547842e+06 |
| std | 3.746180e+07 | 1.911511e+06 | 7275.973730 | 2.995209e+07 | 7.570382e+06 | 1.737938e+07 | 1.515310e+07 | 5931.353995 | 4.452259e+06 | 3.298414e+07 | ... | 2.660829e+07 | 2.225999e+07 | 1.681650e+07 | 5.501454e+06 | 9.567607e+06 | 8.192225e+06 | 3.950749e+07 | 3.417684e+07 | 12485.026753 | 1.834182e+07 |
| min | 7.000000e+00 | 0.000000e+00 | 0.000000 | 7.000000e+00 | 0.000000e+00 | 0.000000e+00 | 2.000000e+00 | 0.000000 | 0.000000e+00 | 7.000000e+00 | ... | 2.662400e+04 | 1.681500e+04 | 9.994000e+03 | 1.059000e+03 | 1.136000e+03 | 5.580000e+02 | 2.375700e+04 | 2.451700e+04 | 2.000000 | 7.000000e+00 |
| 25% | 1.356570e+05 | 6.004000e+03 | 69.000000 | 1.166320e+05 | 1.283100e+04 | 5.655500e+04 | 5.210700e+04 | 8.000000 | 0.000000e+00 | 1.331340e+05 | ... | 4.344842e+05 | 2.326275e+05 | 1.285605e+05 | 5.655400e+04 | 9.248225e+04 | 5.615975e+04 | 5.739350e+06 | 5.023407e+06 | 1278.750000 | 7.427550e+04 |
| 50% | 8.182020e+05 | 4.547000e+04 | 597.000000 | 6.614590e+05 | 1.388180e+05 | 3.897850e+05 | 3.342380e+05 | 113.000000 | 1.185100e+04 | 7.567360e+05 | ... | 3.095970e+06 | 2.695938e+06 | 1.805696e+06 | 2.947270e+05 | 8.330395e+05 | 7.887425e+05 | 3.716590e+07 | 3.365402e+07 | 8007.500000 | 4.022880e+05 |
| 75% | 6.625243e+06 | 3.428690e+05 | 1708.000000 | 5.387805e+06 | 1.166434e+06 | 2.735777e+06 | 2.561513e+06 | 800.000000 | 7.579300e+05 | 6.007817e+06 | ... | 7.366241e+06 | 6.969726e+06 | 5.294763e+06 | 9.105160e+05 | 2.499280e+06 | 2.337874e+06 | 7.441663e+07 | 6.685368e+07 | 19851.000000 | 3.501562e+06 |
| max | 5.132284e+08 | 3.501031e+07 | 73933.000000 | 4.001504e+08 | 1.130780e+08 | 2.701636e+08 | 2.395186e+08 | 98275.000000 | 6.236742e+07 | 4.468251e+08 | ... | 2.243304e+08 | 1.667575e+08 | 1.186927e+08 | 9.224315e+07 | 9.096888e+07 | 6.731098e+07 | 1.349420e+08 | 1.156684e+08 | 46462.000000 | 2.506569e+08 |
8 rows × 22 columns
In [9]:
covid_df.drop(["Sno" , "Time", "ConfirmedIndianNational", "ConfirmedForeignNational"], inplace = True , axis=1)
In [10]:
covid_df.head(10)
Out[10]:
| Date | State/UnionTerritory | Cured | Deaths | Confirmed | |
|---|---|---|---|---|---|
| 0 | 2020-01-30 | Kerala | 0.0 | 0.0 | 1.0 |
| 1 | 2020-01-31 | Kerala | 0.0 | 0.0 | 1.0 |
| 2 | 2020-02-01 | Kerala | 0.0 | 0.0 | 2.0 |
| 3 | 2020-02-02 | Kerala | 0.0 | 0.0 | 3.0 |
| 4 | 2020-02-03 | Kerala | 0.0 | 0.0 | 3.0 |
| 5 | 2020-02-04 | Kerala | 0.0 | 0.0 | 3.0 |
| 6 | 2020-02-05 | Kerala | 0.0 | 0.0 | 3.0 |
| 7 | 2020-02-06 | Kerala | 0.0 | 0.0 | 3.0 |
| 8 | 2020-02-07 | Kerala | 0.0 | 0.0 | 3.0 |
| 9 | 2020-02-08 | Kerala | 0.0 | 0.0 | 3.0 |
In [11]:
covid_df = covid_df.dropna()
In [12]:
#Active Cases = Confirmed - (Cured + Deaths)
covid_df['Active_Cases'] = covid_df['Confirmed'] - covid_df['Cured'] + covid_df['Deaths']
In [13]:
covid_df.head(10)
Out[13]:
| Date | State/UnionTerritory | Cured | Deaths | Confirmed | Active_Cases | |
|---|---|---|---|---|---|---|
| 0 | 2020-01-30 | Kerala | 0.0 | 0.0 | 1.0 | 1.0 |
| 1 | 2020-01-31 | Kerala | 0.0 | 0.0 | 1.0 | 1.0 |
| 2 | 2020-02-01 | Kerala | 0.0 | 0.0 | 2.0 | 2.0 |
| 3 | 2020-02-02 | Kerala | 0.0 | 0.0 | 3.0 | 3.0 |
| 4 | 2020-02-03 | Kerala | 0.0 | 0.0 | 3.0 | 3.0 |
| 5 | 2020-02-04 | Kerala | 0.0 | 0.0 | 3.0 | 3.0 |
| 6 | 2020-02-05 | Kerala | 0.0 | 0.0 | 3.0 | 3.0 |
| 7 | 2020-02-06 | Kerala | 0.0 | 0.0 | 3.0 | 3.0 |
| 8 | 2020-02-07 | Kerala | 0.0 | 0.0 | 3.0 | 3.0 |
| 9 | 2020-02-08 | Kerala | 0.0 | 0.0 | 3.0 | 3.0 |
In [14]:
covid_df.tail(10)
Out[14]:
| Date | State/UnionTerritory | Cured | Deaths | Confirmed | Active_Cases | |
|---|---|---|---|---|---|---|
| 15076 | 2021-05-19 | Puducherry | 69060.0 | 1212.0 | 87749.0 | 19901.0 |
| 15077 | 2021-05-19 | Punjab | 427058.0 | 12317.0 | 511652.0 | 96911.0 |
| 15078 | 2021-05-19 | Rajasthan | 713129.0 | 7080.0 | 879664.0 | 173615.0 |
| 15079 | 2021-05-19 | Sikkim | 8427.0 | 212.0 | 11689.0 | 3474.0 |
| 15080 | 2021-05-19 | Tamil Nadu | 1403052.0 | 18369.0 | 1664350.0 | 279667.0 |
| 15081 | 2021-05-19 | Telangana | 485644.0 | 3012.0 | 536766.0 | 54134.0 |
| 15082 | 2021-05-19 | Tripura | 36402.0 | 450.0 | 42776.0 | 6824.0 |
| 15083 | 2021-05-19 | Uttarakhand | 214426.0 | 5132.0 | 295790.0 | 86496.0 |
| 15084 | 2021-05-19 | Uttar Pradesh | 1483249.0 | 18072.0 | 1637663.0 | 172486.0 |
| 15085 | 2021-05-19 | West Bengal | 1026492.0 | 13576.0 | 1171861.0 | 158945.0 |
In [15]:
statewise = pd.pivot_table(covid_df, values = ["Confirmed", "Cured", "Deaths"], index = "State/UnionTerritory", aggfunc = "max")
In [16]:
statewise["Recovery_rate"] = statewise["Cured"]*100/statewise["Confirmed"]
In [17]:
statewise["Mortality_rate"] = statewise["Deaths"]*100/statewise["Confirmed"]
In [18]:
statewise = statewise.sort_values(by = "Confirmed", ascending = False)
In [19]:
statewise.style.background_gradient(cmap = "Reds")
Out[19]:
| Confirmed | Cured | Deaths | Recovery_rate | Mortality_rate | |
|---|---|---|---|---|---|
| State/UnionTerritory | |||||
| Maharashtra | 5433506.000000 | 4927480.000000 | 83777.000000 | 90.686934 | 1.541859 |
| Karnataka | 2272374.000000 | 1674487.000000 | 22838.000000 | 73.688882 | 1.005028 |
| Kerala | 2200706.000000 | 1846105.000000 | 6612.000000 | 83.886944 | 0.300449 |
| Tamil Nadu | 1664350.000000 | 1403052.000000 | 18369.000000 | 84.300297 | 1.103674 |
| Uttar Pradesh | 1637663.000000 | 1483249.000000 | 18072.000000 | 90.571076 | 1.103524 |
| Andhra Pradesh | 1475372.000000 | 1254291.000000 | 9580.000000 | 85.015237 | 0.649328 |
| Delhi | 1402873.000000 | 1329899.000000 | 22111.000000 | 94.798246 | 1.576123 |
| West Bengal | 1171861.000000 | 1026492.000000 | 13576.000000 | 87.595030 | 1.158499 |
| Chhattisgarh | 925531.000000 | 823113.000000 | 12036.000000 | 88.934136 | 1.300443 |
| Rajasthan | 879664.000000 | 713129.000000 | 7080.000000 | 81.068340 | 0.804853 |
| Gujarat | 766201.000000 | 660489.000000 | 9269.000000 | 86.203098 | 1.209735 |
| Madhya Pradesh | 742718.000000 | 652612.000000 | 7139.000000 | 87.868074 | 0.961199 |
| Haryana | 709689.000000 | 626852.000000 | 6923.000000 | 88.327704 | 0.975498 |
| Bihar | 664115.000000 | 595377.000000 | 4039.000000 | 89.649684 | 0.608178 |
| Odisha | 633302.000000 | 536595.000000 | 2357.000000 | 84.729718 | 0.372176 |
| Telangana | 536766.000000 | 485644.000000 | 3012.000000 | 90.475924 | 0.561138 |
| Punjab | 511652.000000 | 427058.000000 | 12317.000000 | 83.466497 | 2.407300 |
| Telengana | 443360.000000 | 362160.000000 | 2312.000000 | 81.685312 | 0.521472 |
| Assam | 340858.000000 | 290774.000000 | 2344.000000 | 85.306491 | 0.687676 |
| Jharkhand | 320934.000000 | 284805.000000 | 4601.000000 | 88.742545 | 1.433628 |
| Uttarakhand | 295790.000000 | 214426.000000 | 5132.000000 | 72.492647 | 1.735015 |
| Jammu and Kashmir | 251919.000000 | 197701.000000 | 3293.000000 | 78.478003 | 1.307166 |
| Himachal Pradesh | 166678.000000 | 129330.000000 | 2460.000000 | 77.592724 | 1.475900 |
| Goa | 138776.000000 | 112633.000000 | 2197.000000 | 81.161728 | 1.583127 |
| Puducherry | 87749.000000 | 69060.000000 | 1212.000000 | 78.701752 | 1.381212 |
| Chandigarh | 56513.000000 | 48831.000000 | 647.000000 | 86.406667 | 1.144869 |
| Tripura | 42776.000000 | 36402.000000 | 450.000000 | 85.099121 | 1.051992 |
| Manipur | 40683.000000 | 33466.000000 | 612.000000 | 82.260404 | 1.504314 |
| Meghalaya | 24872.000000 | 19185.000000 | 355.000000 | 77.134931 | 1.427308 |
| Arunachal Pradesh | 22462.000000 | 19977.000000 | 88.000000 | 88.936871 | 0.391773 |
| Nagaland | 18714.000000 | 14079.000000 | 228.000000 | 75.232446 | 1.218339 |
| Ladakh | 16784.000000 | 15031.000000 | 170.000000 | 89.555529 | 1.012869 |
| Sikkim | 11689.000000 | 8427.000000 | 212.000000 | 72.093421 | 1.813671 |
| Dadra and Nagar Haveli and Daman and Diu | 9652.000000 | 8944.000000 | 4.000000 | 92.664733 | 0.041442 |
| Cases being reassigned to states | 9265.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| Mizoram | 9252.000000 | 7094.000000 | 29.000000 | 76.675313 | 0.313446 |
| Andaman and Nicobar Islands | 6674.000000 | 6359.000000 | 92.000000 | 95.280192 | 1.378484 |
| Lakshadweep | 5212.000000 | 3915.000000 | 15.000000 | 75.115119 | 0.287797 |
| Unassigned | 77.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| Daman & Diu | 2.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
In [20]:
#Top 10 states with active cases
top10_Active_Cases= covid_df.groupby(by = "State/UnionTerritory").max()[['Active_Cases','Date']].sort_values(by = ["Active_Cases"], ascending = False).reset_index()
In [21]:
fig = plt.figure(figsize=(16,9))
plt.title("Top 10 States with most active cases in India",size = 25)
colors = sns.color_palette("tab10",n_colors=len(top10_Active_Cases))
ax = sns.barplot(data = top10_Active_Cases.iloc[:10],
y = "Active_Cases" , x = "State/UnionTerritory", hue= "State/UnionTerritory",
palette=colors,linewidth = 2, legend = False,
hue_order=top10_Active_Cases["State/UnionTerritory"].unique())
plt.xlabel=("State/UnionTerritory",)
plt.ylabel=("Active_Cases")
plt.show()
In [22]:
#Top states with highest deaths
top10_deaths = covid_df.groupby(by = "State/UnionTerritory").max()[["Deaths", "Date"]].sort_values(by = ["Deaths"], ascending = False).reset_index()
In [23]:
fig = plt.figure(figsize=(18,5))
plt.title("Top 10 States with most death cases in India",size = 25)
colors = sns.color_palette("tab10",n_colors=len(top10_deaths))
ax = sns.barplot(data = top10_deaths.iloc[:12],
y = "Deaths" , x = "State/UnionTerritory", hue= "State/UnionTerritory",
palette=colors,linewidth = 2, legend = False,
hue_order=top10_deaths["State/UnionTerritory"].unique())
plt.xlabel=("State/UnionTerritory",)
plt.ylabel=("Deaths")
plt.show()
In [24]:
#Top 5 Affected States
fig = plt.figure(figsize=(20,8))
ax = sns.lineplot(data = covid_df[covid_df["State/UnionTerritory"].isin(['Maharashtra','Karnataka','Kerala','Uttar Pradesh','Tamil Nadu'])],x = 'Date', y ='Active_Cases', hue = 'State/UnionTerritory')
ax.set_title("Top 5 Affected States", size = 16)
Out[24]:
Text(0.5, 1.0, 'Top 5 Affected States')
In [25]:
vaccine_df.head(10)
Out[25]:
| Updated On | State | Total Doses Administered | Sessions | Sites | First Dose Administered | Second Dose Administered | Male (Doses Administered) | Female (Doses Administered) | Transgender (Doses Administered) | ... | 18-44 Years (Doses Administered) | 45-60 Years (Doses Administered) | 60+ Years (Doses Administered) | 18-44 Years(Individuals Vaccinated) | 45-60 Years(Individuals Vaccinated) | 60+ Years(Individuals Vaccinated) | Male(Individuals Vaccinated) | Female(Individuals Vaccinated) | Transgender(Individuals Vaccinated) | Total Individuals Vaccinated | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 16/01/2021 | India | 48276.0 | 3455.0 | 2957.0 | 48276.0 | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 23757.0 | 24517.0 | 2.0 | 48276.0 |
| 1 | 17/01/2021 | India | 58604.0 | 8532.0 | 4954.0 | 58604.0 | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 27348.0 | 31252.0 | 4.0 | 58604.0 |
| 2 | 18/01/2021 | India | 99449.0 | 13611.0 | 6583.0 | 99449.0 | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 41361.0 | 58083.0 | 5.0 | 99449.0 |
| 3 | 19/01/2021 | India | 195525.0 | 17855.0 | 7951.0 | 195525.0 | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 81901.0 | 113613.0 | 11.0 | 195525.0 |
| 4 | 20/01/2021 | India | 251280.0 | 25472.0 | 10504.0 | 251280.0 | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 98111.0 | 153145.0 | 24.0 | 251280.0 |
| 5 | 21/01/2021 | India | 365965.0 | 32226.0 | 12600.0 | 365965.0 | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 132784.0 | 233143.0 | 38.0 | 365965.0 |
| 6 | 22/01/2021 | India | 549381.0 | 36988.0 | 14115.0 | 549381.0 | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 193899.0 | 355402.0 | 80.0 | 549381.0 |
| 7 | 23/01/2021 | India | 759008.0 | 43076.0 | 15605.0 | 759008.0 | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 267856.0 | 491049.0 | 103.0 | 759008.0 |
| 8 | 24/01/2021 | India | 835058.0 | 49851.0 | 18111.0 | 835058.0 | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 296283.0 | 538647.0 | 128.0 | 835058.0 |
| 9 | 25/01/2021 | India | 1277104.0 | 55151.0 | 19682.0 | 1277104.0 | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 444137.0 | 832766.0 | 201.0 | 1277104.0 |
10 rows × 24 columns
In [26]:
vaccine_df.tail(10)
Out[26]:
| Updated On | State | Total Doses Administered | Sessions | Sites | First Dose Administered | Second Dose Administered | Male (Doses Administered) | Female (Doses Administered) | Transgender (Doses Administered) | ... | 18-44 Years (Doses Administered) | 45-60 Years (Doses Administered) | 60+ Years (Doses Administered) | 18-44 Years(Individuals Vaccinated) | 45-60 Years(Individuals Vaccinated) | 60+ Years(Individuals Vaccinated) | Male(Individuals Vaccinated) | Female(Individuals Vaccinated) | Transgender(Individuals Vaccinated) | Total Individuals Vaccinated | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 7835 | 06/08/2021 | West Bengal | 31774086.0 | 930084.0 | 2485.0 | 22745949.0 | 9028137.0 | 16949389.0 | 14819919.0 | 4778.0 | ... | 11349241.0 | 11608035.0 | 8816810.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 7836 | 07/08/2021 | West Bengal | 32029325.0 | 727057.0 | 1833.0 | 22959053.0 | 9070272.0 | 17066449.0 | 14958036.0 | 4840.0 | ... | 11526470.0 | 11661738.0 | 8841117.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 7837 | 08/08/2021 | West Bengal | 32098768.0 | 271684.0 | 584.0 | 23017864.0 | 9080904.0 | 17098464.0 | 14995446.0 | 4858.0 | ... | 11573400.0 | 11676065.0 | 8849303.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 7838 | 09/08/2021 | West Bengal | 32390378.0 | 866173.0 | 2086.0 | 23257417.0 | 9132961.0 | 17234284.0 | 15151152.0 | 4942.0 | ... | 11765330.0 | 11743594.0 | 8881454.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 7839 | 10/08/2021 | West Bengal | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 7840 | 11/08/2021 | West Bengal | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 7841 | 12/08/2021 | West Bengal | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 7842 | 13/08/2021 | West Bengal | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 7843 | 14/08/2021 | West Bengal | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 7844 | 15/08/2021 | West Bengal | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
10 rows × 24 columns
In [27]:
vaccine_df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 7845 entries, 0 to 7844 Data columns (total 24 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Updated On 7845 non-null object 1 State 7845 non-null object 2 Total Doses Administered 7621 non-null float64 3 Sessions 7621 non-null float64 4 Sites 7621 non-null float64 5 First Dose Administered 7621 non-null float64 6 Second Dose Administered 7621 non-null float64 7 Male (Doses Administered) 7461 non-null float64 8 Female (Doses Administered) 7461 non-null float64 9 Transgender (Doses Administered) 7461 non-null float64 10 Covaxin (Doses Administered) 7621 non-null float64 11 CoviShield (Doses Administered) 7621 non-null float64 12 Sputnik V (Doses Administered) 2995 non-null float64 13 AEFI 5438 non-null float64 14 18-44 Years (Doses Administered) 1702 non-null float64 15 45-60 Years (Doses Administered) 1702 non-null float64 16 60+ Years (Doses Administered) 1702 non-null float64 17 18-44 Years(Individuals Vaccinated) 3733 non-null float64 18 45-60 Years(Individuals Vaccinated) 3734 non-null float64 19 60+ Years(Individuals Vaccinated) 3734 non-null float64 20 Male(Individuals Vaccinated) 160 non-null float64 21 Female(Individuals Vaccinated) 160 non-null float64 22 Transgender(Individuals Vaccinated) 160 non-null float64 23 Total Individuals Vaccinated 5919 non-null float64 dtypes: float64(22), object(2) memory usage: 1.4+ MB
In [28]:
vaccine_df.rename(columns={'Updated On' : 'Vaccine_Date'},inplace = True)
In [29]:
vaccine_df.head()
Out[29]:
| Vaccine_Date | State | Total Doses Administered | Sessions | Sites | First Dose Administered | Second Dose Administered | Male (Doses Administered) | Female (Doses Administered) | Transgender (Doses Administered) | ... | 18-44 Years (Doses Administered) | 45-60 Years (Doses Administered) | 60+ Years (Doses Administered) | 18-44 Years(Individuals Vaccinated) | 45-60 Years(Individuals Vaccinated) | 60+ Years(Individuals Vaccinated) | Male(Individuals Vaccinated) | Female(Individuals Vaccinated) | Transgender(Individuals Vaccinated) | Total Individuals Vaccinated | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 16/01/2021 | India | 48276.0 | 3455.0 | 2957.0 | 48276.0 | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 23757.0 | 24517.0 | 2.0 | 48276.0 |
| 1 | 17/01/2021 | India | 58604.0 | 8532.0 | 4954.0 | 58604.0 | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 27348.0 | 31252.0 | 4.0 | 58604.0 |
| 2 | 18/01/2021 | India | 99449.0 | 13611.0 | 6583.0 | 99449.0 | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 41361.0 | 58083.0 | 5.0 | 99449.0 |
| 3 | 19/01/2021 | India | 195525.0 | 17855.0 | 7951.0 | 195525.0 | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 81901.0 | 113613.0 | 11.0 | 195525.0 |
| 4 | 20/01/2021 | India | 251280.0 | 25472.0 | 10504.0 | 251280.0 | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 98111.0 | 153145.0 | 24.0 | 251280.0 |
5 rows × 24 columns
In [30]:
vaccine_df.isnull().sum()
Out[30]:
Vaccine_Date 0 State 0 Total Doses Administered 224 Sessions 224 Sites 224 First Dose Administered 224 Second Dose Administered 224 Male (Doses Administered) 384 Female (Doses Administered) 384 Transgender (Doses Administered) 384 Covaxin (Doses Administered) 224 CoviShield (Doses Administered) 224 Sputnik V (Doses Administered) 4850 AEFI 2407 18-44 Years (Doses Administered) 6143 45-60 Years (Doses Administered) 6143 60+ Years (Doses Administered) 6143 18-44 Years(Individuals Vaccinated) 4112 45-60 Years(Individuals Vaccinated) 4111 60+ Years(Individuals Vaccinated) 4111 Male(Individuals Vaccinated) 7685 Female(Individuals Vaccinated) 7685 Transgender(Individuals Vaccinated) 7685 Total Individuals Vaccinated 1926 dtype: int64
In [31]:
vaccination = vaccine_df.drop(columns = ["Sputnik V (Doses Administered)","AEFI","18-44 Years (Doses Administered)","45-60 Years (Doses Administered)","60+ Years (Doses Administered)"],axis =1)
In [32]:
vaccination.head()
Out[32]:
| Vaccine_Date | State | Total Doses Administered | Sessions | Sites | First Dose Administered | Second Dose Administered | Male (Doses Administered) | Female (Doses Administered) | Transgender (Doses Administered) | Covaxin (Doses Administered) | CoviShield (Doses Administered) | 18-44 Years(Individuals Vaccinated) | 45-60 Years(Individuals Vaccinated) | 60+ Years(Individuals Vaccinated) | Male(Individuals Vaccinated) | Female(Individuals Vaccinated) | Transgender(Individuals Vaccinated) | Total Individuals Vaccinated | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 16/01/2021 | India | 48276.0 | 3455.0 | 2957.0 | 48276.0 | 0.0 | NaN | NaN | NaN | 579.0 | 47697.0 | NaN | NaN | NaN | 23757.0 | 24517.0 | 2.0 | 48276.0 |
| 1 | 17/01/2021 | India | 58604.0 | 8532.0 | 4954.0 | 58604.0 | 0.0 | NaN | NaN | NaN | 635.0 | 57969.0 | NaN | NaN | NaN | 27348.0 | 31252.0 | 4.0 | 58604.0 |
| 2 | 18/01/2021 | India | 99449.0 | 13611.0 | 6583.0 | 99449.0 | 0.0 | NaN | NaN | NaN | 1299.0 | 98150.0 | NaN | NaN | NaN | 41361.0 | 58083.0 | 5.0 | 99449.0 |
| 3 | 19/01/2021 | India | 195525.0 | 17855.0 | 7951.0 | 195525.0 | 0.0 | NaN | NaN | NaN | 3017.0 | 192508.0 | NaN | NaN | NaN | 81901.0 | 113613.0 | 11.0 | 195525.0 |
| 4 | 20/01/2021 | India | 251280.0 | 25472.0 | 10504.0 | 251280.0 | 0.0 | NaN | NaN | NaN | 3946.0 | 247334.0 | NaN | NaN | NaN | 98111.0 | 153145.0 | 24.0 | 251280.0 |
In [33]:
#Males vs Females VACCINATION
male = vaccination["Male(Individuals Vaccinated)"].sum()
female = vaccination["Female(Individuals Vaccinated)"].sum()
fig = px.pie(names = ['Male','Female'],values =[male,female], title = "Male vs Female Vaccination Rate")
fig.show()
In [34]:
#Removing rows where State = India
vaccine = vaccine_df[vaccine_df.State!='India']
vaccine
Out[34]:
| Vaccine_Date | State | Total Doses Administered | Sessions | Sites | First Dose Administered | Second Dose Administered | Male (Doses Administered) | Female (Doses Administered) | Transgender (Doses Administered) | ... | 18-44 Years (Doses Administered) | 45-60 Years (Doses Administered) | 60+ Years (Doses Administered) | 18-44 Years(Individuals Vaccinated) | 45-60 Years(Individuals Vaccinated) | 60+ Years(Individuals Vaccinated) | Male(Individuals Vaccinated) | Female(Individuals Vaccinated) | Transgender(Individuals Vaccinated) | Total Individuals Vaccinated | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 212 | 16/01/2021 | Andaman and Nicobar Islands | 23.0 | 2.0 | 2.0 | 23.0 | 0.0 | 12.0 | 11.0 | 0.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 23.0 |
| 213 | 17/01/2021 | Andaman and Nicobar Islands | 23.0 | 2.0 | 2.0 | 23.0 | 0.0 | 12.0 | 11.0 | 0.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 23.0 |
| 214 | 18/01/2021 | Andaman and Nicobar Islands | 42.0 | 9.0 | 2.0 | 42.0 | 0.0 | 29.0 | 13.0 | 0.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 42.0 |
| 215 | 19/01/2021 | Andaman and Nicobar Islands | 89.0 | 12.0 | 2.0 | 89.0 | 0.0 | 53.0 | 36.0 | 0.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 89.0 |
| 216 | 20/01/2021 | Andaman and Nicobar Islands | 124.0 | 16.0 | 3.0 | 124.0 | 0.0 | 67.0 | 57.0 | 0.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 124.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7840 | 11/08/2021 | West Bengal | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 7841 | 12/08/2021 | West Bengal | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 7842 | 13/08/2021 | West Bengal | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 7843 | 14/08/2021 | West Bengal | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 7844 | 15/08/2021 | West Bengal | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
7633 rows × 24 columns
In [51]:
vaccine_copy = vaccine.copy()
vaccine_copy.rename(columns={"Total Individuals Vaccinated": "Total"}, inplace=True)
vaccine_copy
Out[51]:
| Vaccine_Date | State | Total Doses Administered | Sessions | Sites | First Dose Administered | Second Dose Administered | Male (Doses Administered) | Female (Doses Administered) | Transgender (Doses Administered) | ... | 18-44 Years (Doses Administered) | 45-60 Years (Doses Administered) | 60+ Years (Doses Administered) | 18-44 Years(Individuals Vaccinated) | 45-60 Years(Individuals Vaccinated) | 60+ Years(Individuals Vaccinated) | Male(Individuals Vaccinated) | Female(Individuals Vaccinated) | Transgender(Individuals Vaccinated) | Total | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 212 | 16/01/2021 | Andaman and Nicobar Islands | 23.0 | 2.0 | 2.0 | 23.0 | 0.0 | 12.0 | 11.0 | 0.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 23.0 |
| 213 | 17/01/2021 | Andaman and Nicobar Islands | 23.0 | 2.0 | 2.0 | 23.0 | 0.0 | 12.0 | 11.0 | 0.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 23.0 |
| 214 | 18/01/2021 | Andaman and Nicobar Islands | 42.0 | 9.0 | 2.0 | 42.0 | 0.0 | 29.0 | 13.0 | 0.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 42.0 |
| 215 | 19/01/2021 | Andaman and Nicobar Islands | 89.0 | 12.0 | 2.0 | 89.0 | 0.0 | 53.0 | 36.0 | 0.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 89.0 |
| 216 | 20/01/2021 | Andaman and Nicobar Islands | 124.0 | 16.0 | 3.0 | 124.0 | 0.0 | 67.0 | 57.0 | 0.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 124.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7840 | 11/08/2021 | West Bengal | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 7841 | 12/08/2021 | West Bengal | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 7842 | 13/08/2021 | West Bengal | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 7843 | 14/08/2021 | West Bengal | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 7844 | 15/08/2021 | West Bengal | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
7633 rows × 24 columns
In [49]:
max_vac = vaccine_copy.groupby('State')['Total'].sum().to_frame('Total')
max_vac = max_vac.sort_values('Total', ascending=False)[:5]
max_vac
Out[49]:
| Total | |
|---|---|
| State | |
| Maharashtra | 1.403075e+09 |
| Uttar Pradesh | 1.200575e+09 |
| Rajasthan | 1.141163e+09 |
| Gujarat | 1.078261e+09 |
| West Bengal | 9.250227e+08 |
In [52]:
fig = plt.figure(figsize=(18,5))
plt.title("Top 5 Vaccinated States",size = 25)
colors = sns.color_palette("tab10",n_colors=len(max_vac))
ax = sns.barplot(data = max_vac.iloc[:10],
y = max_vac.Total , x = max_vac.index, hue= "State",
palette=colors,linewidth = 2, legend = False)
plt.xlabel=("State",)
plt.ylabel=("Vaccination")
plt.show()
In [ ]: